library(xlsx)
library(ggplot2)
library(psych)

df<-read.xlsx("dataset.xlsx", 1)
df1<-df[-which(df$exclude =="yes"),]


df1$HB_status<- array("Normal", nrow(df1))
df1$HB_status[which(df1$HGB<=11.5)]<-"Anemic"

df2<-df1[which(df1$zygosity =="heterozygous"),]



mean(df2$X.BC -df2$G6PD_HGB)
lin.fit<-lm(df2$G6PD_HGB ~df2$X.BC)
df2$residuals<- lin.fit$residuals
df2$means<-rowMeans(cbind(df2$X.BC,df2$G6PD_HGB))
sdv1<-sd(df2$residuals)

p1<-ggplot(df2, aes(x=means,y=residuals)) +geom_point(aes(shape=factor(HB_status))) + geom_hline(aes(yintercept=0))+
  geom_hline(aes(yintercept= 2*sdv1),col ="red")+geom_hline(aes(yintercept= -2*sdv1),col ="red")+
  facet_grid(.~country ,scales="free",space="free") + xlab("means of % bright cells and G6PD activity U/gHb") +
  ylab("residuals from best fit line")  +theme(legend.title=element_blank())

df2_2<-df1[which(df1$zygosity =="heterozygous"),]

mean(df2_2$X.BC -df2_2$G6PD_RBC)
lin.fit<-lm(df2_2$G6PD_RBC ~df2_2$X.BC)
df2_2$residuals<- lin.fit$residuals
df2_2$means<-rowMeans(cbind(df2_2$X.BC,df2_2$G6PD_RBC))
sdv2<-sd(df2_2$residuals)

p2<-ggplot(df2_2, aes(x=means,y=residuals)) +geom_point(aes(shape=factor(HB_status))) + geom_hline(aes(yintercept=0))+
  geom_hline(aes(yintercept= 2*sdv2),col ="red")+geom_hline(aes(yintercept= -2*sdv2),col ="red")+
  facet_grid(.~country ,scales="free",space="free") + xlab("means of % bright cells and G6PD activity U/RBC") +
  ylab("residuals from best fit line")  +theme(legend.title=element_blank())

grid.arrange(p1, p2, ncol = 1)